# Ridership Data Download + ZIP table
library(httr2)
library(readxl)
library(dplyr)
library(readr)
library(DT)
library(htmltools)
library(stringr)
dir.create("data/courseproject", recursive = TRUE, showWarnings = FALSE)
station_ridership_url <- "https://www.mta.info/document/137106"
station_ridership_path <- file.path("data", "courseproject", "station_ridership.xlsx")
# Download the Excel file once, then reuse it
if (!file.exists(station_ridership_path)) {
request(station_ridership_url) |>
req_perform(path = station_ridership_path)
}
# Read the FIRST sheet (sheet name in the file is a bit messy)
station_ridership_raw <- read_excel(station_ridership_path)
station_ridership_clean <- station_ridership_raw %>%
rename(
Station = `Average Weekday Subway Ridership`,
Borough = ...3,
rid_2018 = ...4,
rid_2019 = ...5,
rid_2020 = ...6,
rid_2021 = ...7,
rid_2022 = ...8,
rid_2023 = ...9,
Rank_2023 = ...12
) %>%
filter(
!is.na(Station),
Station != "Station (alphabetical by borough)"
) %>%
filter(
!is.na(Borough),
Borough %in% c("Bx", "B", "M", "Q", "SI")
) %>%
select(
Station,
Borough,
rid_2018,
rid_2019,
rid_2020,
rid_2021,
rid_2022,
rid_2023
) %>%
# round ridership to whole numbers
mutate(
across(starts_with("rid_"), ~ round(., 0))
) %>%
# average across 2018–2023 and rank by that average
mutate(
Avg_2018_2023 = round(rowMeans(across(starts_with("rid_")), na.rm = TRUE), 0),
Avg_Rank = min_rank(dplyr::desc(Avg_2018_2023))
)
# 2023-only version for normalization later
station_ridership_2023 <- station_ridership_clean %>%
select(
Station,
Borough,
Ridership_2023 = rid_2023
)
# Save cleaned datasets
write_csv(
station_ridership_clean,
file.path("data/courseproject", "station_ridership_annual_2018_2023_clean.csv")
)
write_csv(
station_ridership_2023,
file.path("data/courseproject", "station_ridership_2023_clean.csv")
)
# Add ZIP codes
lookup_path <- file.path("data/courseproject", "station_complex_zip_lookup.csv")
station_zip_lookup <- read_csv(lookup_path, show_col_types = FALSE)
norm_station <- function(x) {
x %>%
str_to_lower() %>%
str_replace_all("\\.", "") %>%
str_replace_all("\\s+", " ") %>%
str_trim()
}
od_with_zip <- station_zip_lookup %>%
mutate(Station_norm = norm_station(Station)) %>%
select(Station_norm, Zipcode)
ridership_norm <- station_ridership_clean %>%
mutate(Station_norm = norm_station(Station))
matches <- sapply(
ridership_norm$Station_norm,
function(nm) which.min(adist(nm, od_with_zip$Station_norm))
)
station_ridership_with_zip <- ridership_norm %>%
mutate(Zipcode = od_with_zip$Zipcode[matches]) %>%
select(
Station,
Borough,
Zipcode,
rid_2018,
rid_2019,
rid_2020,
rid_2021,
rid_2022,
rid_2023,
Avg_2018_2023,
Avg_Rank
)
write_csv(
station_ridership_with_zip,
file.path("data/courseproject", "station_ridership_with_zip.csv")
)
# Interactive table
if (!exists("mta_blue")) {
mta_blue <- "#0039A6"
}
ridership_dt <- datatable(
station_ridership_with_zip,
rownames = FALSE,
colnames = c(
"Station",
"Borough",
"ZIP",
"2018",
"2019",
"2020",
"2021",
"2022",
"2023",
"2018–2023 Avg.",
"Avg. Rank"
),
options = list(
pageLength = 10,
autoWidth = FALSE,
scrollX = FALSE,
dom = '<"top">t<"bottom"ipf>',
columnDefs = list(
list(width = "220px", targets = 0),
list(width = "40px", targets = 1),
list(width = "60px", targets = 2), # ZIP
list(width = "70px", targets = 3:9), # years
list(width = "85px", targets = 10), # avg
list(width = "70px", targets = 11), # rank
list(className = "dt-right", targets = 3:11)
),
initComplete = JS(
"function() {",
" var api = this.api();",
" var $table = $(api.table().container());",
" var $filter = $table.find('div.dataTables_filter');",
" $filter.appendTo($table.find('div.bottom'));",
"}"
)
),
class = "cell-border stripe hover compact"
)
ridership_widget <- tagList(
tags$style(HTML("
div.dataTables_wrapper {
max-width: 950px;
width: 100%;
margin-left: auto;
margin-right: auto;
}
table.dataTable th,
table.dataTable td {
padding: 4px 6px;
}
table.dataTable {
font-family: Calibri, Arial, sans-serif;
}
table.dataTable.stripe tbody tr.even {
background-color: #f2f2f2;
}
table.dataTable.stripe tbody tr.odd {
background-color: #ffffff;
}
")),
tags$div(
style = "max-width:950px; width:100%; margin-left:auto; margin-right:auto;",
tags$div(
style = paste0(
"background-color:", mta_blue, ";",
"color:white;",
"font-family:Calibri, Arial, sans-serif;",
"font-size:20px;",
"text-align:center;",
"padding:8px;",
"border-radius:4px;",
"margin-bottom:8px;"
),
"Annual Station Ridership, 2018–2023"
),
ridership_dt
)
)
ridership_widget